************************************************************************************************************************************************
************************ Using Rich Lists to Study the Super-Rich and Top Wealth Inequality: Insights from Switzerland *************************
************************************************************************************************************************************************

clear all
set more off
set scheme s1color  

**Author: Enea Baselgia and Isabel Z. Martinez
**Date: April, 4 2024

* READ ALL THE DATA INTO STATA *

* import data set with bracket-format A
global dataset dta_A
cd "$mypath/"
import excel "tws_2017_restat_updated.xlsx", sheet("CH") firstrow

keep if cantonid==0 
 
foreach var in _0 0_50 50_100 100_200 200_500 500_1000 1000_2000 2000_3000 3000_5000 5000_10000 _10000 _total{ 	
rename  p`var' tu`var' 
 }
 
rename tu_0 tu0
rename tu_10000 tu_u10000
rename w_0 w0
rename w_10000 w_u10000


** merge population stat
preserve
cd "$mypath"
use "2560_BILANZ-SwissRichListDataset_1989-2020_Data_v1.0.dta", clear

drop if year<2003 
drop if year>2019
keep year tot_taxunits tot_pop tot_adult_marr
collapse (mean) total_taxunits=tot_taxunits (mean) total_pop=tot_pop (mean) married_pop=tot_adult_marr, by(year)

tempfile pop_data
save "`pop_data'"
restore

merge 1:1 year using "`pop_data'", nogen


** define brackets for Pareto-Interpolation

// run Pareto-Interpolation
// for the do-file to run, create a variable containing the LOWER bounds of all brackets
input brackets
10000
5000
3000
2000
1000
500
200 
100 
50
0
end

// create a local with the brackets (order matters!)
global a = " 10000 5000 3000 2000 1000 500 200  100  50 0"
global b = " 5000 3000 2000 1000 500 200  100  50 0 "

// rename variables to have only lower bound of bracket in their name
rename tu0 tu00
rename tu#_(##) tu#
rename tu(##)_(###) tu(##)
rename tu(###)_(###) tu(###)
rename tu(###)_(####) tu(###)
rename tu(####)_(####) tu(####)
rename tu(####)_(#####) tu(####)
rename tu_u10000 tu10000

rename w0 w00
rename w#_(##) w#
rename w(##)_(###) w(##)
rename w(###)_(###) w(###)
rename w(###)_(####) w(###)
rename w(####)_(####) w(####)
rename w(####)_(#####) w(####)
rename w_u10000 w10000

rename w* wea*

// check wealth variables: correct? what unit?
foreach b in 10000 5000 3000 2000 1000 500 200 100 50 0 00 {
gen avgw_`b' = wea`b'/tu`b'

}
/* 
--> wealth is recorded in millions of CHF. 
For Pareto to run correctly, everything must be in CHF! */

// convert to CHF

foreach var of varlist wea* {
replace `var' = `var' * 1000000
}

drop avgw_*


// RUN PARETO INTERPOLATION
cd "$mypath"
do "5_b_Pareto_interpolation_TWS.do"

* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *

// CLEAN UP DATA SET
cd "$mypath/"
do "5_c_cleanup_TWS.do"


*** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 
cd "$mypath/"
save "TWS_dataset.dta", replace
*** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** 
clear all

